001 /* 002 * CondorGLSFDispatcher.java 003 * 004 * Created on July 17, 2003, 11:17 AM 005 * 006 * This file is part of the STAR Scheduler. 007 * Copyright (c) 2002-2003 STAR Collaboration - Brookhaven National Laboratory 008 * 009 * STAR Scheduler is free software; you can redistribute it and/or modify 010 * it under the terms of the GNU General Public License as published by 011 * the Free Software Foundation; either version 2 of the License, or 012 * (at your option) any later version. 013 * 014 * STAR Scheduler is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * along with STAR Scheduler; if not, write to the Free Software 021 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 022 */ 023 package gov.bnl.star.offline.scheduler.condorg; 024 025 import gov.bnl.star.offline.scheduler.*; 026 import gov.bnl.star.offline.scheduler.Dispatchers.lsf.CSHApplication; 027 import gov.bnl.star.offline.scheduler.Dispatchers.lsf.LSFDispatcher; 028 import gov.bnl.star.offline.scheduler.util.CSHCommandLineTask; 029 import gov.bnl.star.offline.scheduler.util.FilesystemToolkit; 030 //import gov.bnl.star.offline.scheduler.util.StatisticsRecorder; //Moved Statistics recording to Scheduler.java LH 031 import gov.bnl.star.offline.scheduler.util.GenericResourceRequirementStringDefinition; 032 033 import java.io.File; 034 import java.io.FileOutputStream; 035 import java.io.PrintStream; 036 import java.util.*; 037 038 import java.util.logging.Level; 039 import java.util.logging.Logger; 040 041 042 /** Dispatches jobs using Condor-G on a remote site that uses LSF. It will use some 043 * extra rsl attributes created to command some extra features such as mail 044 * notification, resource usage, job name and target machine. These extra LSF 045 * attribute require a patch to the LSF job manager. 046 * @author Gabriele Carcassi 047 * @version 1.0 2003/07/23 048 */ 049 public class CondorGLSFDispatcher extends LSFDispatcher { 050 static private Logger log = Logger.getLogger(CondorGLSFDispatcher.class.getName()); 051 052 private static String condorEx; 053 protected CSHApplication application; 054 055 private String ResReqDefinitionObj; 056 057 public void setResourceRequirementStringDefinition(String ResReqDefinitionObj){ 058 this.ResReqDefinitionObj = ResReqDefinitionObj; 059 060 } 061 062 public void setCondorEx(String condorEx) { 063 this.condorEx = condorEx; 064 } 065 066 public String getCondorEx() { 067 return condorEx; 068 } 069 070 /** Creates a new dispatcher */ 071 public CondorGLSFDispatcher() { 072 } 073 074 /** Creates the scripts and dispatches the job on the target machine. 075 * @param request the job request 076 */ 077 public void dispatch(Request request, List jobs) { 078 log.info("Dispatching using Condor-g and LSF: \"" + request.getCommand() + 079 "\""); 080 081 // Enables the simulation mode if necessary 082 useSimulationMode(request.getSimulation()); 083 reportedFailure = false; 084 085 // Submits from the higher to the lower JobID. This way the 086 // user has a feel of when the last job is going to be 087 // submitted 088 for (int nProcess = jobs.size() - 1; nProcess >= 0; 089 nProcess--) { 090 Job job = (Job) jobs.get(nProcess); 091 092 System.out.print("Dispatching process " + 093 job.getJobID() + "."); 094 dispatch(request, job); 095 } 096 097 //StatisticsRecorder.getIntance().recordStatistics(request, jobs); //Moved Statistics recording to Scheduler.java LH 098 } 099 100 protected void dispatch(Request request, Job job) { 101 application = (CSHApplication) ComponentLibrary.getInstance().getComponent("CSHApplication"); 102 103 // TODO: all the parameters should be passed in one go 104 application.setJob(request, job); 105 application.setScratchDir(scratchDir); 106 application.setSubmissionCommand(getCondorGCommand(request, job)); 107 108 application.prepareJob(); 109 prepareClassAd(request, job); 110 111 log.info("Executing \"" + getCondorGCommand(request, job) + "\""); 112 113 if (!simulation) { 114 try { 115 Thread.sleep(getMsBtwnSuccess()); 116 } catch (Exception e) { 117 } 118 119 int attempt = 0; 120 boolean success = false; 121 122 while (!success && (attempt < getMaxAttempts())) { 123 try { 124 CSHCommandLineTask task = new CSHCommandLineTask(getCondorGCommand( 125 request, job), true, 30000); 126 task.execute(); 127 128 if (task.getExitStatus() != 0) { 129 log.warning("bsub failed: " + task.getOutput()); 130 Thread.sleep(getMsBtwnFailure()); 131 System.out.print("/"); 132 attempt++; 133 } else { 134 success = true; 135 } 136 } catch (Exception e) { 137 log.log(Level.SEVERE, 138 "Couldn't submit the script to Condor-g", e); 139 140 try { 141 Thread.sleep(getMsBtwnFailure()); 142 } catch (Exception e1) { 143 } 144 145 System.out.print("/"); 146 attempt++; 147 } 148 } 149 150 if (success) { 151 System.out.println(" done."); 152 } else { 153 System.out.println(" FAILED!!"); 154 } 155 } else { 156 System.out.println(" simulated."); 157 } 158 } 159 160 /** Returns the command line to submit the job through condor-g. 161 * @param request the request that originated the job 162 * @param job the job to be dispatched 163 * @return the commandline to submit the job 164 */ 165 protected String getCondorGCommand(Request request, Job job) { 166 return condorEx + " " + getClassAdName(request, job); 167 } 168 169 /** Returns the name of the file containing the class ad. Class ad is the job 170 * description required by condor to submit a job. 171 * @param request the request that originated the job 172 * @param job the job to be submitted 173 * @return the file name of the class ad 174 */ 175 protected String getClassAdName(Request request, Job job) { 176 return "sched" + job.getJobID() + ".condorg"; 177 } 178 179 private void prepareClassAd(Request request, Job job) { 180 try { 181 PrintStream classAd = new PrintStream(new FileOutputStream( 182 new File(getClassAdName(request, job)))); 183 createClassAd(request, job, classAd); 184 } catch (Exception e) { 185 log.log(Level.SEVERE, "Couldn't create the class ad", e); 186 throw new RuntimeException("Couldn't create the class ad " + 187 getClassAdName(request, job) + ": " + e.getMessage()); 188 } 189 } 190 191 private void createClassAd(Request request, Job job, 192 PrintStream classAd) { 193 classAd.print("executable = "); 194 classAd.println(getExecutable()); 195 196 if (getArguments() != null) { 197 classAd.print("arguments = "); 198 classAd.println(getArguments()); 199 } 200 201 classAd.print("globusscheduler = "); 202 classAd.println(getGlobusScheduler()); 203 204 if (application.getStdin() != null) { 205 classAd.print("input = "); 206 classAd.println(application.getStdin()); 207 } 208 209 if (application.getStdout() != null) { 210 classAd.print("output = "); 211 classAd.println(application.getStdout()); 212 } 213 214 if (application.getStderr() != null) { 215 classAd.print("error = "); 216 classAd.println(application.getStderr()); 217 } 218 219 classAd.print("log = "); 220 classAd.println(getLogName(job)); 221 222 if (getRemoteDirectory() != null) { 223 classAd.print("remote_initialdir = "); 224 classAd.println(getRemoteDirectory()); 225 } 226 227 classAd.print("globusrsl ="); 228 229 if (job.getTarget() != null) { 230 classAd.print(" (xlsfmachine = "); 231 classAd.print(job.getTarget()); 232 classAd.print(")"); 233 } 234 235 if (application.getJobName() != null) { 236 classAd.print(" (xlsfjobname = "); 237 classAd.print(application.getJobName()); 238 classAd.print(")"); 239 } 240 241 if (request.getMail()) { 242 classAd.print(" (xlsfmailreport = "); 243 classAd.print("false"); 244 classAd.print(")"); 245 } else { 246 classAd.print(" (xlsfmailreport = "); 247 classAd.print("true"); 248 classAd.print(")"); 249 } 250 ////////////lbh 251 252 GenericResourceRequirementStringDefinition lsfResReqDef = new GenericResourceRequirementStringDefinition(); 253 if(ResReqDefinitionObj != null) 254 lsfResReqDef = (GenericResourceRequirementStringDefinition) ComponentLibrary.getInstance().getComponent(ResReqDefinitionObj); 255 256 if ((getResourceUsageSwitch(job) != null)&&( lsfResReqDef.hasResourcesDefinition(job))) { 257 258 String SD = "rusage" + getResourceUsageSwitch(job).subSequence(getResourceUsageSwitch(job).indexOf("["),getResourceUsageSwitch(job).indexOf("]")).toString() + "]"; 259 String Res = "\\\"( " + lsfResReqDef.makeString(job).replaceAll("\\\"", "").concat(" ) ").concat(SD).concat("\\\""); 260 classAd.print(" (xlsfresources = "); 261 classAd.print(Res); 262 classAd.print(")"); 263 } 264 else if(getResourceUsageSwitch(job) != null){ 265 classAd.print(" (xlsfresources = "); 266 classAd.print(getResourceUsageSwitch(job)); 267 classAd.print(")"); 268 } 269 else if( lsfResReqDef.hasResourcesDefinition(job)){ 270 classAd.print(" (xlsfresources = "); 271 classAd.print(lsfResReqDef.makeString(job)); 272 classAd.print(")"); 273 } 274 275 276 // if (getResourceUsageSwitch(job) != null) { 277 // classAd.print(" (xlsfresources = "); 278 // classAd.print(getResourceUsageSwitch(job)); 279 // classAd.print(")"); 280 // } 281 282 if (job.getQueue() != null) { 283 classAd.print(" (queue = "); 284 classAd.print(job.getQueue()); 285 classAd.print(")"); 286 } 287 288 classAd.println(); 289 290 if (isTransferExecutable()) { 291 classAd.println("transfer_executable = true"); 292 } else { 293 classAd.println("transfer_executable = false"); 294 } 295 classAd.println("notification = never"); 296 classAd.println("universe = globus"); 297 classAd.println("queue"); 298 } 299 300 private String getExecutable() { 301 if (application.getCommandLine().indexOf(' ') == -1) { 302 return application.getCommandLine(); 303 } 304 305 return application.getCommandLine().substring(0, 306 application.getCommandLine().indexOf(' ')); 307 } 308 309 private String getArguments() { 310 if (application.getCommandLine().indexOf(' ') == -1) { 311 return null; 312 } 313 314 return application.getCommandLine().substring(application.getCommandLine().indexOf(' ') + 1); 315 } 316 317 private String getLogName(Job job) { 318 // TODO maybe log filename should be put as a general property of Process (as stds) 319 return "sched" + job.getJobID() + ".condorg.log"; 320 } 321 322 private String getGlobusScheduler() { 323 //TODO make it flexible 324 return getGlobusGatekeeper(); 325 } 326 327 private String gatekeeper; 328 329 /** Holds value of property transferExecutable. */ 330 private boolean transferExecutable; 331 332 public void setGlobusGatekeeper(String gatekeeper) { 333 this.gatekeeper = gatekeeper; 334 } 335 336 public String getGlobusGatekeeper() { 337 return gatekeeper; 338 } 339 340 private String remoteInitialDir; 341 342 public void setRemoteInitialDir(String remoteInitialDir) { 343 this.remoteInitialDir = remoteInitialDir; 344 } 345 346 public String getRemoteInitialDir() { 347 return remoteInitialDir; 348 } 349 350 private String getRemoteDirectory() { 351 // TODO this has to be specified better: remote execution directory could be different from scheduler execution directory 352 if (".".equals(getRemoteInitialDir())) return FilesystemToolkit.getCurrentDirectory(); 353 return getRemoteInitialDir(); 354 } 355 356 protected String getResourceUsageSwitch(Job job) { 357 String res = super.getResourceUsageSwitch(job); 358 if (res == null) return res; 359 360 return res.replaceAll("\"", "\\\\\""); 361 } 362 363 /** Getter for property transferExecutable. 364 * @return Value of property transferExecutable. 365 * 366 */ 367 public boolean isTransferExecutable() { 368 return this.transferExecutable; 369 } 370 371 /** Setter for property transferExecutable. 372 * @param transferExecutable New value of property transferExecutable. 373 * 374 */ 375 public void setTransferExecutable(boolean transferExecutable) { 376 this.transferExecutable = transferExecutable; 377 } 378 379 }